*
* Main file preparing data for analysis
*
* Title: Dominated Choices in a Strategically Simple College Admissions Environment
*
* Authors:
*	Ran I. Shorrer
*	Sandor Sovago
*

global main_dir "~/Research/Obvious Mistakes/replication_package/"

global raw_data_dir "${main_dir}/raw_data/"
global data_dir "${main_dir}/tidy_data/"
global code_data_prep_dir "${main_dir}/code_data_preparation/"
global helpers_dir "${code_data_prep_dir}/helpers/"
	
cd "${main_dir}"

* Raw data ---------------------------------------------------------------------
forvalues year = 2009/2014 {
	if (`year' < 2014) {
		global raw_data_applicants_`year' "${raw_data_dir}/individual_`year'.dta"		
		global raw_data_contracts_`year' "${raw_data_dir}/institutional_data_`year'.dta"
		global raw_data_applications_`year' "${raw_data_dir}/application`year'.dta"
	}
	else {
		global raw_data_applicants_`year' "${raw_data_dir}/egyeni_`year'.dta"
		global raw_data_contracts_`year' "$raw_data_dir/intezmenyi_adatok_`year'.dta"
		global raw_data_applications_`year' "$raw_data_dir/jelentkezes`year'.dta"
	}
	
	global raw_data_income "${raw_data_dir}/Income.dta"
	global raw_data_unemployment "${raw_data_dir}/NUTS_unemployment.dta"
	
	if (`year' >= 2013) {
		global raw_data_tution_`year' "${raw_data_dir}/tuition_`year.csv'"
	}
}

* Interim data -----------------------------------------------------------------
forvalues year = 2013/2014 {
	* Output: 1_tidyApplicationsWithPriorityScores.do
	global data_applications_ps_`year' "${data_dir}/applications_with_priority_scores_`year'.dta"
	
	* Output: 2_tidyApplicantData.do"
	global data_applicants_`year' "${data_dir}/applicants_`year'.dta"
	
	* Output: 3_prepareDataForWelfareAnalysis.do
	global data_contracts_`year' "${data_dir}/contracts_`year'.dta"
	global csv_welfare_analysis_no_err_`year' "${data_dir}/welfare_analysis_no_err_`year'.csv"
	global welfare_analysis_no_err_`year' "${data_dir}/welfare_analysis_no_err_`year'.dta"
	global data_for_welfare_analysis_`year' "${data_dir}/welfare_analysis_`year'.dta"
	
	* Output: 4_runSPDA.do
	global data_matching_spda_`year' "${data_dir}/matching_spda_`year'.dta"
	
	* Output: 5_correctPriorityScoreCutoffs.do
	global correct_ps_cutoffs_`year' "${data_dir}/correct_priority_score_cutoffs_`year'.dta"
	
	* Output: 6_createContractDictionary.do
	global data_contract_dictionary_`year' "${data_dir}/contract_dictionary_`year'.dta"
	
	* Output: 7_identifyDominatedChoices.do
	global data_applications_tidy_`year' "${data_dir}/applications_tidy_`year'.dta"
	global data_applicants_tidy_`year' "${data_dir}/applicants_tidy_`year'.dta"
	global data_drop_`year' "${data_dir}/dominated_dropping_`year'.dta"
	global data_flip_`year' "${data_dir}/dominated_flipping_`year'.dta"
	
	* Output: 8_runSPDAWithNoDominatedChoicesLowerBound.do
	global match_no_dominated_lower_`year' "${data_dir}/matching_no_dominated_lower_`year'.dta"
	
	* Output: 9_runSPDAWithNoDominatedChoicesUpperBound.do
	global match_no_dominated_upper_`year' "${data_dir}/matching_no_dominated_upper_`year'.dta"
	
	* Output: 10_collectSPDAWithNoDominatedChoices.do"
	global result_no_dominated_`year' "${data_dir}/result_no_dominated_`year'.dta"
}

* Tidy data --------------------------------------------------------------------
	* Applicant-level data (Tables 2, 3, 4, 5, 6, B1, B2, B3)
	global data_applicant_level_tidy "${data_dir}/data_applicant_level_analysis.dta"

	* Program-level data (Tables 7, 8, B4)
	global data_program_level_tidy "${data_dir}/data_program_level_analysis.dta"
	

* ------------------------------------------------------------------------------
* Data preparation -------------------------------------------------------------
* ------------------------------------------------------------------------------

cd "${main_dir}"

forvalues year = 2013/2014 {
	global year `year'
	
	do "${code_data_prep_dir}/1_tidyApplicationsWithPriorityScores.do"
	
	do "${code_data_prep_dir}/2_tidyApplicantData.do"
		
	do "${code_data_prep_dir}/3_prepareDataForWelfareAnalysis.do"
	
	do "${code_data_prep_dir}/4_runSPDA.do"
	
	do "${code_data_prep_dir}/5_correctPriorityScoreCutoffs.do"
	
	do "${code_data_prep_dir}/6_createContractDictionary.do"
	
	do "${code_data_prep_dir}/7_identifyDominatedChoices.do"
			
	* No dominated choices -- state-funded above self-funded
	do "${code_data_prep_dir}/8_runSPDAWithNoDominatedChoicesLowerBound.do"
	
	* No dominated choices -- self-funded on top
	do "${code_data_prep_dir}/9_runSPDAWithNoDominatedChoicesUpperBound.do"
	
	do "${code_data_prep_dir}/10_collectSPDAWithNoDominatedChoices.do"
}

* Create applicant-level data ("${data_dir}/applicant_level_tidy.dta")
do "${code_data_prep_dir}/11_appendSPDAWithNoDominatedChoices.do"

* Create application-level data ("${data_dir}/data_program_level_analysis.dta")
do "${code_data_prep_dir}/12_prepareDataForProgramLevelAnalysis.do"

* Create data for the cost of dominated choices ("${data_dir}/Costly_dominated_choices.dta")
do "${code_data_prep_dir}/13_prepareTuition.do"

forvalues year = 2009/2014 {
	global year `year'
	erase "${data_applications_ps_${year}}"
	erase "${data_applicants_${year}}"
	erase "${data_contracts_${year}}"
	erase "${welfare_analysis_no_err_${year}}"
	erase "${csv_welfare_analysis_no_err_${year}}"
	erase "${data_for_welfare_analysis_${year}}"
	erase "${data_matching_spda_${year}}"
	erase "${data_contract_dictionary_${year}}"
	erase "${correct_ps_cutoffs_${year}}"
	erase "${data_applications_tidy_${year}}"
	erase "${data_applicants_tidy_${year}}"
	erase "${data_drop_${year}}"
	erase "${data_flip_${year}}"
	erase "${match_no_dominated_lower_${year}}"
	erase "${match_no_dominated_upper_${year}}"
	erase "${result_no_dominated_${year}}"
}
